#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import bs4
import requests

import re

URL = "http://www.yoka.com/dna/222/117/index.html"

response = requests.get(URL)
soup = bs4.BeautifulSoup(response.text, "lxml")

introduction = soup.find('p', {'class':"desc"}).text

zodic = soup.find_all('span', {'class':'info'})[1].a.text

name_date = re.compile(r'(?P<name>\w+)，\d{4}年(?P<month>\d{1,2})月(?P<day>\d{1,2})日')
a = name_date.match(introduction)

print(zodic, a['name'])

def url2soup(url, headers={}, data=None):
    """url -> soup
    
    处理任意编码的网页，返回BeautifulSoup对象
    """

    if data:
        response = requests.post(url, data=data, headers=headers)
    else:
        response = requests.get(url, headers=headers)

    encodings = requests.utils.get_encodings_from_content(response.text)
    if encodings:
        encoding = encodings[0]
    else:
        encoding = response.apparent_encoding
    encode_content = response.content.decode(encoding, 'replace')
    return bs4.BeautifulSoup(encode_content, "lxml")
